# Joe Kelley - jkelley38@gatech.edu
# CS-6440-O01 - Mini-Project 2
# Please pull the live copy from https://github.com/JoeKelleygt/Mini-Project-2
# All instructions are located within Mini-project 2.pdf
# Imports
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
pyo.init_notebook_mode()
# Pneumonia Data from:
# https://catalog.data.gov/dataset/deaths-from-pneumonia-and-influenza-pi-and-all-deaths-by-state-and-region-national-center-
# Modified above to only include National data (not state breakdown) & only include all ages
df_pnaDeaths = pd.read_csv("../data/National_Pneumonia_Deaths.csv")
# Separate the MMWR report column to be more legible in a graph - by Week
df_pnaDeaths['Week'] = df_pnaDeaths['MMWR Year/Week'].astype(str)
df_pnaDeaths['Week'] = df_pnaDeaths['Week'].str[-2:]
display(df_pnaDeaths)
| geoid | Region | State | age | season | MMWR Year/Week | Deaths from influenza | Deaths from pneumonia | Deaths from pneumonia and influenza | All Deaths | Pecent of deaths due to pneumonia or influenza | pecent complete | Week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | National | NaN | NaN | All | 2017-18 | 201838 | 11 | 2808 | 2819 | 51480 | 5.475913 | 108.197858 | 38 |
| 1 | National | NaN | NaN | All | 2017-18 | 201815 | 212 | 3588 | 3800 | 55165 | 6.888426 | 115.942791 | 15 |
| 2 | National | NaN | NaN | All | 2012-13 | 201318 | 10 | 3504 | 3514 | 48368 | 7.265134 | 101.657226 | 18 |
| 3 | National | NaN | NaN | All | 2009-10 | 201027 | 3 | 3215 | 3218 | 45580 | 7.060114 | 95.797560 | 27 |
| 4 | National | NaN | NaN | All | 2010-11 | 201120 | 7 | 3575 | 3582 | 46635 | 7.680926 | 98.014901 | 20 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 489 | National | NaN | NaN | All | 2016-17 | 201640 | 18 | 3022 | 3040 | 51403 | 5.914052 | 108.036024 | 40 |
| 490 | National | NaN | NaN | All | 2016-17 | 201648 | 34 | 3515 | 3549 | 54594 | 6.500714 | 114.742694 | 48 |
| 491 | National | NaN | NaN | All | 2015-16 | 201601 | 32 | 4245 | 4277 | 55843 | 7.658972 | 117.367774 | 01 |
| 492 | National | NaN | NaN | All | 2015-16 | 201633 | 5 | 2866 | 2871 | 49809 | 5.764019 | 104.685842 | 33 |
| 493 | National | NaN | NaN | All | 2016-17 | 201645 | 19 | 3209 | 3228 | 52533 | 6.144709 | 110.410996 | 45 |
494 rows × 13 columns
# United States Historical Data - Pneumonia Deaths by Flu Season & Week
# Figure 1
fig_pnaPrev = px.scatter(df_pnaDeaths,x='Week', y='Deaths from pneumonia', color='season')
fig_pnaPrev.update_layout(title = "Yearly U.S. Pneumonia Deaths by Week - ICD-10 codes J12.0-J18.9")
display(fig_pnaPrev)
# Covid-19 Data from:
# https://data.cdc.gov/NCHS/Provisional-COVID-19-Death-Counts-by-Week-Ending-D/r8kw-7aab
df_covidUS = pd.read_csv("../data/Provisional_COVID-19_Death_Counts_by_Week_Ending_Date_and_State.csv")
df_covidUS["pna - pna_covid"] = df_covidUS["Pneumonia Deaths"] - df_covidUS["Pneumonia and COVID-19 Deaths"]
display(df_covidUS)
| Data as of | Start week | End Week | Group | State | Indicator | COVID-19 Deaths | Total Deaths | Percent of Expected Deaths | Pneumonia Deaths | Pneumonia and COVID-19 Deaths | Influenza Deaths | Pneumonia, Influenza, or COVID-19 Deaths | Footnote | pna - pna_covid | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 09/18/2020 | 02/01/2020 | 02/01/2020 | By week | United States | Week-ending | 0.0 | 58576.0 | 0.99 | 3796.0 | 0.0 | 479.0 | 4275.0 | NaN | 3796.0 |
| 1 | 09/18/2020 | 02/08/2020 | 02/08/2020 | By week | United States | Week-ending | 1.0 | 59296.0 | 0.99 | 3799.0 | 0.0 | 520.0 | 4320.0 | NaN | 3799.0 |
| 2 | 09/18/2020 | 02/15/2020 | 02/15/2020 | By week | United States | Week-ending | 0.0 | 58697.0 | 1.00 | 3824.0 | 0.0 | 558.0 | 4382.0 | NaN | 3824.0 |
| 3 | 09/18/2020 | 02/22/2020 | 02/22/2020 | By week | United States | Week-ending | 5.0 | 58734.0 | 1.01 | 3699.0 | 1.0 | 564.0 | 4267.0 | NaN | 3698.0 |
| 4 | 09/18/2020 | 02/29/2020 | 02/29/2020 | By week | United States | Week-ending | 9.0 | 59167.0 | 1.03 | 3822.0 | 5.0 | 654.0 | 4480.0 | NaN | 3817.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1777 | 09/18/2020 | 08/15/2020 | 08/15/2020 | By week | Puerto Rico | Week-ending | 64.0 | 287.0 | 0.52 | 76.0 | 45.0 | 0.0 | 95.0 | NaN | 31.0 |
| 1778 | 09/18/2020 | 08/22/2020 | 08/22/2020 | By week | Puerto Rico | Week-ending | 45.0 | 193.0 | 0.37 | 38.0 | 32.0 | 0.0 | 51.0 | NaN | 6.0 |
| 1779 | 09/18/2020 | 08/29/2020 | 08/29/2020 | By week | Puerto Rico | Week-ending | 23.0 | 113.0 | 0.21 | 29.0 | 17.0 | 0.0 | 35.0 | NaN | 12.0 |
| 1780 | 09/18/2020 | 09/05/2020 | 09/05/2020 | By week | Puerto Rico | Week-ending | 36.0 | 69.0 | 0.13 | 27.0 | 23.0 | NaN | 40.0 | One or more data cells have counts between 1–9... | 4.0 |
| 1781 | 09/18/2020 | 09/12/2020 | 09/12/2020 | By week | Puerto Rico | Week-ending | 10.0 | 12.0 | 0.02 | NaN | NaN | 0.0 | 10.0 | One or more data cells have counts between 1–9... | NaN |
1782 rows × 15 columns
# Pneumonia Deaths in 2020 by State / All States
# Figure 2
fig_pna2020 = px.scatter(df_covidUS,x='End Week', y='Pneumonia Deaths', color='State')
fig_pna2020.update_layout(title = "2020 U.S. Pneumonia Deaths by Week - ICD-10 codes J12.0-J18.9")
display(fig_pna2020)
# Data taken from:
# https://www.cdc.gov/flu/weekly/
df_pnaCompare = pd.read_csv("../data/NCHSData37.csv")
display(df_pnaCompare)
| Year | Week | Percent of Deaths Due to Pneumonia and Influenza | Expected | Threshold | All Deaths | Pneumonia Deaths | Influenza Deaths | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2013 | 40 | 6.617957 | 6.36132 | 6.77011 | 47492 | 3140 | 3 |
| 1 | 2013 | 41 | 6.652714 | 6.45326 | 6.86185 | 47304 | 3135 | 12 |
| 2 | 2013 | 42 | 6.779127 | 6.55439 | 6.96279 | 47602 | 3216 | 11 |
| 3 | 2013 | 43 | 6.622544 | 6.66322 | 7.07142 | 47746 | 3151 | 11 |
| 4 | 2013 | 44 | 6.730631 | 6.77811 | 7.18613 | 48777 | 3271 | 12 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 358 | 2020 | 33 | 10.072165 | 5.13191 | 5.46758 | 58061 | 5843 | 5 |
| 359 | 2020 | 34 | 9.366954 | 5.12719 | 5.46286 | 54372 | 5085 | 8 |
| 360 | 2020 | 35 | 8.507472 | 5.13611 | 5.47178 | 48381 | 4106 | 10 |
| 361 | 2020 | 36 | 6.918100 | 5.15856 | 5.49423 | 36325 | 2509 | 4 |
| 362 | 2020 | 37 | 5.285226 | 5.19422 | 5.52989 | 18126 | 957 | 1 |
363 rows × 8 columns
# Comparing U.S. Pneumonia Deaths in 2020 vs Previous 6 Flu Seasons
# Figure 3
fig_pnaComparison = go.Figure(data = go.Scatter(x=df_pnaCompare['Week'], y=df_pnaCompare['Pneumonia Deaths'],
mode='markers',marker=dict(
color=df_pnaCompare['Year'], colorscale='Sunset', showscale=True)))
fig_pnaComparison.update_layout(title = "U.S. Pneumonia Deaths 2020 vs Previous 6 Flu Seasons")
fig_pnaComparison.show()
# COVID-19 Deaths in 2020 by State / All States
# Figure 4
fig_covid2020 = px.scatter(df_covidUS,x='End Week', y='COVID-19 Deaths', color='State')
fig_covid2020.update_layout(title = "2020 U.S. Deaths involving COVID-19 - ICD-10 Code U07.1")
display(fig_covid2020)
# Deaths with Pneumonia and COVID-19 (ICD-10 codes J12.0-J18.9 and U07.1) in 2020 by State / All States
# Figure 5
fig_covid_pna2020 = px.scatter(df_covidUS,x='End Week', y='Pneumonia and COVID-19 Deaths', color='State')
fig_covid_pna2020.update_layout(title = "2020 U.S. Deaths with Pneumonia and COVID-19 - ICD-10 codes J12.0-J18.9 and U07.1")
display(fig_covid_pna2020)
# Influenza Deaths (ICD-10 codes J09-J11) in 2020 by State / All States
# Figure 6
fig_inf2020 = px.scatter(df_covidUS,x='End Week', y='Influenza Deaths', color='State')
fig_inf2020.update_layout(title = "2020 U.S. Influenza Deaths - ICD-10 codes J09-J11")
display(fig_inf2020)
# Deaths with Pneumonia, Influenza, or COVID-19 Deaths (ICD-10 codes J12.0-J18.9, J09-J11, U07.1) in 2020 by State / All States
# Figure 7
fig_covid_pna_inf_2020 = px.scatter(df_covidUS,x='End Week', y='Pneumonia, Influenza, or COVID-19 Deaths', color='State')
fig_covid_pna_inf_2020.update_layout(title = "2020 U.S. Deaths with Pneumonia, Influenza, or COVID-19 Deaths - ICD-10 codes J12.0-J18.9, J09-J11, U07.1")
display(fig_covid_pna_inf_2020)
# Comparison of the 4 figures above for reference
# Figure 8
subs1 = make_subplots(rows=1, cols=2, subplot_titles=('Pneumonia Deaths','COVID-19 Deaths'))
subs2 = make_subplots(rows=1, cols=2, subplot_titles=('Pneumonia and COVID-19 Deaths','Pneumonia, Influenza, or COVID-19 Deaths'))
subs1.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['Pneumonia Deaths'], mode='markers',
marker=dict(color=df_covidUS['Pneumonia Deaths'],colorscale='deep')), row=1, col=1)
subs1.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['COVID-19 Deaths'], mode='markers',
marker=dict(color=df_covidUS['COVID-19 Deaths'],colorscale='deep')), row=1, col=2)
subs2.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['Pneumonia and COVID-19 Deaths'], mode='markers',
marker=dict(color=df_covidUS['Pneumonia and COVID-19 Deaths'],colorscale='deep')), row=1, col=1)
subs2.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['Pneumonia, Influenza, or COVID-19 Deaths'], mode='markers',
marker=dict(color=df_covidUS['Pneumonia, Influenza, or COVID-19 Deaths'],colorscale='deep')),
row=1, col=2)
subs1.update_layout(showlegend=False)
subs2.update_layout(showlegend=False)
subs1.show()
subs2.show()
# Let's Subtract Pneumonia Deaths that were classified with the COVID-19 ICD-10 Code U07.1 from the Deaths classified
# as Pneumonia Deaths ICD-10 Codes J12.0-J18.9 to see how this looks
# Figure 9
fig_pna_noCovid = px.scatter(df_covidUS,x='End Week', y='pna - pna_covid', color='State')
fig_pna_noCovid.update_layout(title = "2020 U.S. Pneumonia Deaths Without COVID-19 ICD-10 Code U07.1")
display(fig_pna_noCovid)
# Let's combine some data - historical pneumonia deaths YoY for the United States vs Figure 9 (2020 PNA deaths without
# ICD-10 Code U07.1)
# I decided to manually combine these data sets; while it can be done programmitcally, I experienced issues with my pandas
# data frames combinations, so for this comparison, a 2 minute excel copy was easier
df_pnaDeathsCombined = pd.read_csv("../data/National_Pneumonia_Deaths_Modified.csv")
# Separate the MMWR report column to be more legible in a graph - by Week
df_pnaDeathsCombined['Week'] = df_pnaDeathsCombined['MMWR Year/Week'].astype(str)
df_pnaDeathsCombined['Week'] = df_pnaDeathsCombined['Week'].str[-2:]
display(df_pnaDeathsCombined)
| geoid | Region | State | age | season | MMWR Year/Week | Deaths from influenza | Deaths from pneumonia | Deaths from pneumonia and influenza | All Deaths | Pecent of deaths due to pneumonia or influenza | pecent complete | Week | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | National | NaN | NaN | All | 2009-10 | 201027 | 3.0 | 3215 | 3218.0 | 45580.0 | 7.060114 | 95.797560 | 27 |
| 1 | National | NaN | NaN | All | 2009-10 | 200952 | 47.0 | 4310 | 4357.0 | 49617.0 | 8.781264 | 104.282306 | 52 |
| 2 | National | NaN | NaN | All | 2009-10 | 200941 | 174.0 | 3703 | 3877.0 | 46453.0 | 8.346070 | 97.632384 | 41 |
| 3 | National | NaN | NaN | All | 2009-10 | 201032 | 5.0 | 2928 | 2933.0 | 44400.0 | 6.605856 | 93.317500 | 32 |
| 4 | National | NaN | NaN | All | 2009-10 | 201019 | 4.0 | 3464 | 3468.0 | 46362.0 | 7.480264 | 97.441125 | 19 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 522 | NaN | NaN | NaN | NaN | 2020 | 33 | NaN | 2595 | NaN | NaN | NaN | NaN | 33 |
| 523 | NaN | NaN | NaN | NaN | 2020 | 34 | NaN | 2434 | NaN | NaN | NaN | NaN | 34 |
| 524 | NaN | NaN | NaN | NaN | 2020 | 35 | NaN | 2134 | NaN | NaN | NaN | NaN | 35 |
| 525 | NaN | NaN | NaN | NaN | 2020 | 36 | NaN | 1711 | NaN | NaN | NaN | NaN | 36 |
| 526 | NaN | NaN | NaN | NaN | 2020 | 37 | NaN | 955 | NaN | NaN | NaN | NaN | 37 |
527 rows × 13 columns
# Let's see how the 2020 pneumonia data compares to historical trends
# Figure 10
fig_pnaComb = px.scatter(df_pnaDeathsCombined,x='Week', y='Deaths from pneumonia', color='season')
fig_pnaComb.update_layout(title = "Annual U.S. Pneumonia Deaths by Week - ICD-10 codes J12.0-J18.9 NOT Including U07.1")
# print(fig_pnaComb.data)
fig_pnaComb.show()
# Let's take a look at the CDC's Nationally Notifiable Infectious Diseases and Conditions, United States: Weekly Tables -
# We'll see how cases during COVID-19's peak weeks compare to previous years trends
# https://wonder.cdc.gov/nndss/nndss_weekly_tables_menu.asp
# Hepatitis A
df_hepA = pd.read_csv("../data/infectious disease/hepA/hepA.csv")
display(df_hepA)
| Year | Week | Hepatitis Type A Cases | |
|---|---|---|---|
| 0 | Y - 2016 | 13 | 14 |
| 1 | Y - 2016 | 14 | 4 |
| 2 | Y - 2016 | 15 | 11 |
| 3 | Y - 2016 | 16 | 19 |
| 4 | Y - 2016 | 17 | 18 |
| 5 | Y - 2016 | 18 | 11 |
| 6 | Y - 2017 | 13 | 17 |
| 7 | Y - 2017 | 14 | 12 |
| 8 | Y - 2017 | 15 | 13 |
| 9 | Y - 2017 | 16 | 8 |
| 10 | Y - 2017 | 17 | 8 |
| 11 | Y - 2017 | 18 | 22 |
| 12 | Y - 2018 | 13 | 52 |
| 13 | Y - 2018 | 14 | 50 |
| 14 | Y - 2018 | 15 | 47 |
| 15 | Y - 2018 | 16 | 57 |
| 16 | Y - 2018 | 17 | 81 |
| 17 | Y - 2018 | 18 | 59 |
| 18 | Y - 2019 | 13 | 190 |
| 19 | Y - 2019 | 14 | 137 |
| 20 | Y - 2019 | 15 | 131 |
| 21 | Y - 2019 | 16 | 149 |
| 22 | Y - 2019 | 17 | 192 |
| 23 | Y - 2019 | 18 | 142 |
| 24 | Y - 2020 | 13 | 18 |
| 25 | Y - 2020 | 14 | 30 |
| 26 | Y - 2020 | 15 | 28 |
| 27 | Y - 2020 | 16 | 31 |
| 28 | Y - 2020 | 17 | 38 |
| 29 | Y - 2020 | 18 | 13 |
# Figure 11 - Hepatitis A
fig_hepA = px.bar(df_hepA,x='Week', y='Hepatitis Type A Cases', color='Year', barmode='group')
fig_hepA.update_layout(
xaxis = dict(
tickmode= 'linear',
dtick = 1.0,
),
title = "Hepatitis A Weekly Cases YoY (Weeks 13-18)")
display(fig_hepA)
# Infectious Diseases Data
# Chlamydia Trachomatis
df_ct = pd.read_csv("../data/infectious disease/chlamydia/chlamydia_data.csv")
display(df_ct)
| Year | Week | Chlamydia Trachomatis Cases | |
|---|---|---|---|
| 0 | Y - 2016 | 13 | 14552 |
| 1 | Y - 2016 | 14 | 16154 |
| 2 | Y - 2016 | 15 | 16220 |
| 3 | Y - 2016 | 16 | 14126 |
| 4 | Y - 2016 | 17 | 15551 |
| 5 | Y - 2016 | 18 | 16725 |
| 6 | Y - 2017 | 13 | 14739 |
| 7 | Y - 2017 | 14 | 15810 |
| 8 | Y - 2017 | 15 | 12684 |
| 9 | Y - 2017 | 16 | 15778 |
| 10 | Y - 2017 | 17 | 15756 |
| 11 | Y - 2017 | 18 | 15819 |
| 12 | Y - 2018 | 13 | 12158 |
| 13 | Y - 2018 | 14 | 14268 |
| 14 | Y - 2018 | 15 | 15402 |
| 15 | Y - 2018 | 16 | 13094 |
| 16 | Y - 2018 | 17 | 13263 |
| 17 | Y - 2018 | 18 | 16466 |
| 18 | Y - 2019 | 13 | 11054 |
| 19 | Y - 2019 | 14 | 11493 |
| 20 | Y - 2019 | 15 | 9459 |
| 21 | Y - 2019 | 16 | 11129 |
| 22 | Y - 2019 | 17 | 12711 |
| 23 | Y - 2019 | 18 | 11859 |
| 24 | Y - 2020 | 13 | 5896 |
| 25 | Y - 2020 | 14 | 6883 |
| 26 | Y - 2020 | 15 | 4884 |
| 27 | Y - 2020 | 16 | 7085 |
| 28 | Y - 2020 | 17 | 6090 |
| 29 | Y - 2020 | 18 | 6371 |
# Figure 12 - Chlamydia Trachomatis
fig_ct = px.bar(df_ct,x='Week', y='Chlamydia Trachomatis Cases', color='Year', barmode='group')
fig_ct.update_layout(
xaxis = dict(
tickmode= 'linear',
dtick = 1.0),
title = "Chlamydia Trachomatis Weekly Cases YoY (Weeks 13-18)")
display(fig_ct)
# Infectious Diseases Data
# Giardiasis
df_gd = pd.read_csv("../data/infectious disease/giardiasis/giardiasis_cases.csv")
display(df_gd)
| Year | Week | Giardiasis Cases | |
|---|---|---|---|
| 0 | Y - 2016 | 13 | 124 |
| 1 | Y - 2016 | 14 | 141 |
| 2 | Y - 2016 | 15 | 163 |
| 3 | Y - 2016 | 16 | 143 |
| 4 | Y - 2016 | 17 | 138 |
| 5 | Y - 2016 | 18 | 138 |
| 6 | Y - 2017 | 13 | 117 |
| 7 | Y - 2017 | 14 | 126 |
| 8 | Y - 2017 | 15 | 80 |
| 9 | Y - 2017 | 16 | 93 |
| 10 | Y - 2017 | 17 | 124 |
| 11 | Y - 2017 | 18 | 131 |
| 12 | Y - 2018 | 13 | 80 |
| 13 | Y - 2018 | 14 | 106 |
| 14 | Y - 2018 | 15 | 66 |
| 15 | Y - 2018 | 16 | 88 |
| 16 | Y - 2018 | 17 | 107 |
| 17 | Y - 2018 | 18 | 75 |
| 18 | Y - 2019 | 13 | 91 |
| 19 | Y - 2019 | 14 | 83 |
| 20 | Y - 2019 | 15 | 84 |
| 21 | Y - 2019 | 16 | 117 |
| 22 | Y - 2019 | 17 | 121 |
| 23 | Y - 2019 | 18 | 72 |
| 24 | Y - 2020 | 13 | 42 |
| 25 | Y - 2020 | 14 | 34 |
| 26 | Y - 2020 | 15 | 35 |
| 27 | Y - 2020 | 16 | 34 |
| 28 | Y - 2020 | 17 | 21 |
| 29 | Y - 2020 | 18 | 39 |
# Figure 13 - Giardiasis
fig_gd = px.bar(df_gd,x='Week', y='Giardiasis Cases', color='Year', barmode='group')
fig_gd.update_layout(
xaxis = dict(
tickmode= 'linear',
dtick = 1.0),
title = "Giardiasis Weekly Cases YoY (Weeks 13-18)")
display(fig_gd)